

use "$basedata/estimationdata/All_T_J/teach_pref_est_data.dta", clear

collapse (sum) estsampall choiceset applied current_lea next_school current_school (mean) mu_jt_preY_mean_school_ma mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma  commute_time va_ma_preY frac_black frac_hisp p_school_m2_ma mean_n_ma ncerdc_exp ///
 (max) applied_date PostingDate female black hispanic titleI itidx jtidx, by(applicant_id job_id app_year)

 

tempfile tempestimationdata 
save `tempestimationdata', replace

use "$basedata/FOCAL_applications", clear

keep job_type applicant_id job_id app_year oncycle elem_title

gen instructional = job_type==1

sort applicant_id job_id app_year

merge 1:1 applicant_id job_id app_year using `tempestimationdata'

ren _m estimation_merge

unique applicant_id app_year if app_year==2015 & estsampall==1

replace estsampall = 0 if estimation_merge==1
assert estsampall==0 | estsampall==1

gen raw_data = (estimation_merge!=2)
gen estimation_all_data = (estimation_merge!=1)
gen estimation_VA_data = (estsampall==1)
gen estimation_CF_data = (estsampall==1 & app_year==2015)

replace estimation_all_data = 0 if (oncycle==0 | instructional==0 | elem_title==0)
replace estimation_VA_data = 0 if (oncycle==0 | instructional==0 | elem_title==0)
replace estimation_CF_data = 0 if (oncycle==0 | instructional==0 | elem_title==0)

save "$temp/sample_comparison_data", replace

** application level

use "$temp/sample_comparison_data", clear

gen applied_all = applied
replace applied_all = 1 if raw_data==1
replace applied_all = . if estimation_all_data==1 & applied==0

forv vv=1/5 {
	gen var`vv' = .
}

label var var1 "\emph{\textbf{Applications}}"
label var var2 "N"
label var var3 "On-Cycle"
label var var4 "Instructional"		
label var var5 "Elementary"	

mat m1 = (.,.,.,.)

local samplist "raw_data estimation_all_data estimation_VA_data estimation_CF_data"

local Nlist1 "applied_all"
local meanlist1 "oncycle instructional elem_title"

local j = 0

foreach a of local samplist {
	local i = 1
	local j = `j'+1
foreach x of local Nlist1 {
	local i = `i'+1
	qui count if `x'!=. & `a'==1
	if `j'==1 {
		mat m`i' = (r(N))
	}
	if `j'>1 {
		mat m`i' = (m`i',r(N))
	}
	
}
foreach x of local meanlist1 {
	local i = `i'+1
	qui summ `x' if `a'==1
	if `j'==1 {
		mat m`i' = (r(mean))
	}
	if `j'>1 {
		mat m`i' = (m`i',r(mean))
	}	
}
}


forv vv=1/5 {
	mat rownames m`vv' = var`vv'
}

matrix a=m1\m2\m3\m4\m5


use "$temp/sample_comparison_data", clear

replace next_school = 0 if current_school==1


collapse (max) raw_data estimation_all_data estimation_VA_data estimation_CF_data next_school female black hispanic (sum) choiceset applied (mean) current_lea mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma  commute_time va_ma_preY ncerdc_exp mu_jt_preY_mean_school_ma, by(applicant_id app_year)

gen CA = mu_jt_m2_hat_preY_ma-mu_jt_m1_hat_preY_ma
gen app_rate = applied/choiceset

egen itidx = group(applicant_id app_year)

forv vv=6/20 {
	gen var`vv' = .
}



label var var6 "\emph{\textbf{Applicants}}"
label var var7 "N"
label var var8 "Female"
label var var9 "Black"		
label var var10 "Hispanic"	
label var var11 "In-District"	
label var var12 "Choice Set Size"	
label var var13 "Application Rate"	
label var var14 "Transferred"	
label var var15 "Mean Commute Time"	
label var var16 "Experience"	
label var var17 "VA Econ Adv"	
label var var18 "VA Econ Disadv"	
label var var19 "Abs Adv"	
label var var20 "Comp Adv in Econ Disadv"	
	

mat m6 = (.,.,.,.)


local samplist "raw_data estimation_all_data estimation_VA_data estimation_CF_data"

local Nlist2 "itidx"
local meanlist2 "female black hispanic current_lea choiceset app_rate next_school commute_time ncerdc_exp mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mu_jt_preY_mean_school_ma CA"


local j = 0

foreach a of local samplist {
	local i = 6
	local j = `j'+1
foreach x of local Nlist2 {
	local i = `i'+1
	qui count if `x'!=. & `a'==1
	if `j'==1 {
		mat m`i' = (r(N))
	}
	if `j'>1 {
		mat m`i' = (m`i',r(N))
	}
	
}
foreach x of local meanlist2 {
	local i = `i'+1
	qui summ `x' if `a'==1
	if `j'==1 {
		mat m`i' = (.)
	}
	if `j'>1 {
		mat m`i' = (m`i',r(mean))
	}	
}
}

forv vv=6/20 {
	mat rownames m`vv' = var`vv'
}



matrix b=m6\m7\m8\m9\m10\m11\m12\m13\m14\m15\m16\m17\m18\m19\m20


use "$temp/sample_comparison_data", clear

local samplist "raw_data estimation_all_data estimation_VA_data estimation_CF_data"

foreach x of local samplist {
	gen cs_`x' = choiceset*`x'
	gen ap_`x' = applied*`x'
}



collapse (max) raw_data estimation_all_data estimation_VA_data estimation_CF_data (sum) cs_* ap_* choiceset applied (mean) frac_black frac_hisp p_school_m2_ma mean_n_ma, by(job_id app_year)

gen app_rate = applied/choiceset

foreach x of local samplist {
	gen ar_`x' = ap_`x'/cs_`x'
}

egen jtidx = group(job_id app_year)

forv vv=1/5 {
	gen var`vv' = .
}

label var var1 "\emph{\textbf{Applications}}"
label var var2 "N"
label var var3 "On-Cycle"
label var var4 "Instructional"		
label var var5 "Elementary"	

forv vv=6/20 {
	gen var`vv' = .
}



label var var6 "\emph{\textbf{Applicants}}"
label var var7 "N"
label var var8 "Female"
label var var9 "Black"		
label var var10 "Hispanic"	
label var var11 "In-District"	
label var var12 "Choice Set Size"	
label var var13 "Application Rate"	
label var var14 "Transferred"	
label var var15 "Mean Commute Time"	
label var var16 "Experience"	
label var var17 "VA Econ Adv"	
label var var18 "VA Econ Disadv"	
label var var19 "Abs Adv"	
label var var20 "Comp Adv in Econ Disadv"

forv vv=21/28 {
	gen var`vv' = .
}



label var var21 "\emph{\textbf{Positions}}"
label var var22 "N"	
label var var23 "Choice Set Size"
label var var24 "Application Rate"		
label var var25 "Mean Class Size"	
label var var26 "Frac Econ Disadv"	
label var var27 "Frac Black"	
label var var28 "Frac Hispanic"	

	

mat m21 = (.,.,.,.)



local samplist "raw_data estimation_all_data estimation_VA_data estimation_CF_data"

local Nlist3 "jtidx"
local meanlist3 "cs ar mean_n_ma p_school_m2_ma frac_black frac_hisp"

local j = 0

foreach a of local samplist {
	local i = 21
	local j = `j'+1
foreach x of local Nlist3 {
	local i = `i'+1
	qui count if `x'!=. & `a'==1
	if `j'==1 {
		mat m`i' = (r(N))
	}
	if `j'>1 {
		mat m`i' = (m`i',r(N))
	}
	
}
foreach x of local meanlist3 {
	local i = `i'+1
	if "`x'"=="cs" | "`x'"=="ar" {
		qui summ `x'_`a' if `a'==1
	}
	else {
		qui summ `x' if `a'==1
	}
	if `j'==1 {
		mat m`i' = (.)
	}
	if `j'>1 {
		mat m`i' = (m`i',r(mean))
	}	
}
}

forv vv=21/28 {
	mat rownames m`vv' = var`vv'
}



matrix c=m21\m22\m23\m24\m25\m26\m27\m28

matrix d = a\b\c


frmttable using "$tables/sample_summary_stats", statmat(d) replace va tex fra ///
	ctitles("","Full Sample","Elementary Sample","Value-Added Sample","2016 Sample") ///
	sdec(0,0,0,0 \ 0,0,0,0 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 0,0,0,0 \ 0,0,0,0 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 0,0,0,0 \ 0,0,0,0  \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2 \ 2,2,2,2)